library(tidybayes)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.32.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
library(bayesplot)
## This is bayesplot version 1.14.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
library(posterior)
## This is posterior version 1.6.1
##
## Attaching package: 'posterior'
##
## The following object is masked from 'package:bayesplot':
##
## rhat
##
## The following objects are masked from 'package:stats':
##
## mad, sd, var
##
## The following objects are masked from 'package:base':
##
## %in%, match
library(loo)
## This is loo version 2.8.0
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
library(bayesrules)
library(dplyr)
library(broom.mixed)
library(rstan)
## Loading required package: StanHeaders
##
## rstan version 2.32.7 (Stan version 2.32.2)
##
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
##
##
## Attaching package: 'rstan'
##
## The following objects are masked from 'package:posterior':
##
## ess_bulk, ess_tail
##
## The following object is masked from 'package:tidyr':
##
## extract
# Load Dataset
wine <- read.csv("WineQT.csv", header = TRUE)
set.seed(36501)
## Data Preparation (Defining the Discrete Outcome)
# Define 'High Quality' as quality >= 7
# Define Quality >=7 as 1, else 0 (Binary Outcome)
high_quality_wines <- wine %>%
mutate(Is_High_Quality = ifelse(quality >= 7, 1, 0))
# Sample Statistics
N <- nrow(high_quality_wines)
Y <- sum(high_quality_wines$Is_High_Quality)
cat("Total number of wines(N):", N, "\n")
## Total number of wines(N): 1143
cat("Number of high-quality wines (Y):", Y, "\n")
## Number of high-quality wines (Y): 159
Interpretation:
For the discrete parameter analysis, we focused on estimating θ(theta), the proportion of high-quality wines in the dataset. We transformed the quality variable (originally ranging from 3 to 9) into a binary outcome by defining wines with quality ≥ 7 as “high quality” (coded as 1) and wines below this threshold as “not high quality” (coded as 0).
# Model: Y ~ Binomial(N, theta), theta ~ Beta(alpha_0, beta_0)
alpha_0 <- 1.0
beta_0 <- 1.0
# Prior: θ ~ Beta(1, 1) (Uniform Prior)
Explanation:
We choose the Beta(1, 1) prior, which is a non-informative Uniform prior. This prior assigns equal probability density to all possible values of the proportion θ (from 0 to 1). It is the conjugate prior for the Binomial likelihood, guaranteeing a closed-form posterior solution.
# Posterior parameters
alpha_N <- alpha_0 + Y
beta_N <- beta_0 + N - Y
# Theoretical Summary
theoretical_summary <- data.frame(
Mean = alpha_N / (alpha_N + beta_N), # E[theta | Y]
SD = sqrt((alpha_N * beta_N) / ((alpha_N + beta_N)^2 * (alpha_N + beta_N + 1))), # SD of Beta dist
CI_Lower = qbeta(0.025, alpha_N, beta_N),
CI_Upper = qbeta(0.975, alpha_N, beta_N)
)
print(theoretical_summary)
## Mean SD CI_Lower CI_Upper
## 1 0.139738 0.01024189 0.1202705 0.1603973
stan_quality_model <- "
data {
int<lower=0> N;
int<lower=0> Y;
real<lower=0> alpha_0;
real<lower=0> beta_0;
}
parameters {
real<lower=0, upper=1> theta;
}
model {
theta ~ beta(alpha_0, beta_0);
Y ~ binomial(N, theta);
}
"
# Fit model
quality_stan_fit <- stan(
model_code = stan_quality_model,
data = list(N = N, Y = Y, alpha_0 = alpha_0, beta_0 = beta_0),
chains = 4,
iter = 10000,
seed = 36501
)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 7e-06 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.022 seconds (Warm-up)
## Chain 1: 0.028 seconds (Sampling)
## Chain 1: 0.05 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 2e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.022 seconds (Warm-up)
## Chain 2: 0.022 seconds (Sampling)
## Chain 2: 0.044 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 2e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.022 seconds (Warm-up)
## Chain 3: 0.022 seconds (Sampling)
## Chain 3: 0.044 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 2e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.02 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.022 seconds (Warm-up)
## Chain 4: 0.022 seconds (Sampling)
## Chain 4: 0.044 seconds (Total)
## Chain 4:
print(quality_stan_fit, pars = "theta")
## Inference for Stan model: anon_model.
## 4 chains, each with iter=10000; warmup=5000; thin=1;
## post-warmup draws per chain=5000, total post-warmup draws=20000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## theta 0.14 0 0.01 0.12 0.13 0.14 0.15 0.16 6687 1
##
## Samples were drawn using NUTS(diag_e) at Tue Dec 9 20:28:50 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Trace Plot
mcmc_trace(quality_stan_fit, pars = "theta")
Density Plot
# Density Plot
mcmc_dens_overlay(quality_stan_fit, pars = "theta")
Autocorrelation plot
mcmc_acf(quality_stan_fit, pars = "theta")
R-hat values
rhat_value_disc <- summary(quality_stan_fit, pars = "theta")$summary[, "Rhat"]
cat("R-hat value:", rhat_value_disc, "\n")
## R-hat value: 1.000607
R-hat = 1.00: Excellent convergence (< 1.01 threshold).
Effective sample size ratio
neff_ratio(quality_stan_fit, pars = "theta")
## [1] 0.3343606
The ESS ratio of 0.334 indicates that about one-third of our 40,000 MCMC samples are statistically independent, which is sufficient for reliable posterior inference.
# Extract posterior samples
theta_samples <- as.numeric(extract(quality_stan_fit)$theta)
# Numerical comparison
comparison_discrete <- data.frame(
Method = c("Theoretical (Beta)", "MCMC (Stan)"),
Mean = c(theoretical_summary$Mean, mean(theta_samples)),
SD = c(theoretical_summary$SD, sd(theta_samples)),
CI_2.5 = c(theoretical_summary$CI_Lower, as.numeric(quantile(theta_samples, 0.025))),
CI_97.5 = c(theoretical_summary$CI_Upper, as.numeric(quantile(theta_samples, 0.975)))
)
print(comparison_discrete)
## Method Mean SD CI_2.5 CI_97.5
## 1 Theoretical (Beta) 0.1397380 0.01024189 0.1202705 0.1603973
## 2 MCMC (Stan) 0.1398676 0.01033205 0.1205440 0.1606909
# Graphical comparison
theta_grid <- seq(0.05, 0.25, length.out = 1000) # Adjust range based on data
theoretical_density_disc <- dbeta(theta_grid, alpha_N, beta_N)
posterior_disc_plot <- ggplot() +
geom_density(data = data.frame(theta = theta_samples),
aes(x = theta, fill = "MCMC"), alpha = 0.5) +
geom_line(data = data.frame(theta = theta_grid, density = theoretical_density_disc),
aes(x = theta, y = density, color = "Theoretical"), size = 1.2) +
scale_fill_manual(values = c("MCMC" = "steelblue")) +
scale_color_manual(values = c("Theoretical" = "black")) +
labs(title = "Posterior Distribution: MCMC vs Theoretical (High Quality Proportion)",
x = "θ (Proportion of High Quality Wines)", y = "Density") +
theme_minimal() +
theme(legend.title = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(posterior_disc_plot)
Interpretation:
The close alignment between MCMC and theoretical results validates our MCMC implementation and confirms the accuracy of the sampling algorithm. This comparison is possible because the Beta-Binomial model has a closed-form conjugate posterior.
alcohol_data <- wine$alcohol
summary(alcohol_data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.40 9.50 10.20 10.44 11.10 14.90
For continuous data, we chose alcohol (μ) as the parameter that we are interested in.
mu_0 <- 10
sigma_0 <- 2
n <- length(alcohol_data)
y_bar <- mean(alcohol_data)
sigma_known <- sd(alcohol_data)
Prior: μ ~ N(10, 2²)
Explanation: - μ₀ = 10: typical wine alcohol content is around 10%. - σ₀ = 2: weakly informative, allowing 95% prior probability. - Based on general wine knowledge but allows data to dominate.
Assumed: σ = 1.082 (known, using sample SD)
tau_0 <- 1/sigma_0^2
tau <- 1/sigma_known^2
mu_n <- (tau_0*mu_0 + n*tau*y_bar) / (tau_0 + n*tau)
sigma_n <- sqrt(1/(tau_0 + n*tau))
data.frame(
Mean = mu_n,
SD = sigma_n,
CI_Lower = qnorm(0.025, mu_n, sigma_n),
CI_Upper = qnorm(0.975, mu_n, sigma_n)
)
## Mean SD CI_Lower CI_Upper
## 1 10.442 0.03200568 10.37927 10.50473
stan_model <- "
data {
int<lower=0> n;
vector[n] Y;
real mu_0;
real<lower=0> sigma_0;
real<lower=0> sigma;
}
parameters {
real mu;
}
model {
Y ~ normal(mu, sigma);
mu ~ normal(mu_0, sigma_0);
}
"
# Fit model
stan_fit <- stan(
model_code = stan_model,
data = list(n = n, Y = alcohol_data,
mu_0 = mu_0, sigma_0 = sigma_0,
sigma = sigma_known),
chains = 4,
iter = 10000,
seed = 36501
)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 8e-06 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.049 seconds (Warm-up)
## Chain 1: 0.053 seconds (Sampling)
## Chain 1: 0.102 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.046 seconds (Warm-up)
## Chain 2: 0.049 seconds (Sampling)
## Chain 2: 0.095 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 5e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.05 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.051 seconds (Warm-up)
## Chain 3: 0.052 seconds (Sampling)
## Chain 3: 0.103 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.046 seconds (Warm-up)
## Chain 4: 0.047 seconds (Sampling)
## Chain 4: 0.093 seconds (Total)
## Chain 4:
print(stan_fit, pars = "mu")
## Inference for Stan model: anon_model.
## 4 chains, each with iter=10000; warmup=5000; thin=1;
## post-warmup draws per chain=5000, total post-warmup draws=20000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu 10.44 0 0.03 10.38 10.42 10.44 10.46 10.5 7076 1
##
## Samples were drawn using NUTS(diag_e) at Tue Dec 9 20:30:11 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Trace Plot
mcmc_trace(stan_fit, pars = "mu")
Interpretation: Chains mix well with no trends, indicating convergence.
Density Plot
mcmc_dens_overlay(stan_fit, pars = "mu")
Interpretation: All chains have similar distributions, confirming convergence.
Autocorrelation plot
mcmc_acf(stan_fit, pars = "mu")
R-hat values
summary(stan_fit, pars = "mu")$summary[, "Rhat"]
## [1] 1.000394
R-hat = 1.00: Excellent convergence (< 1.01 threshold).
Effective sample size ratio
neff_ratio(stan_fit, pars = "mu")
## [1] 0.3538086
An ESS ratio of 0.354 demonstrates highly efficient MCMC sampling with adequate independent samples for the continuous parameter μ.
# Extract posterior samples
posterior_samples <- as.numeric(extract(stan_fit)$mu)
# Numerical comparison
comparison <- data.frame(
Method = c("Theoretical", "MCMC (Stan)"),
Mean = c(mu_n, mean(posterior_samples)),
SD = c(sigma_n, sd(posterior_samples)),
CI_2.5 = c(qnorm(0.025, mu_n, sigma_n),
as.numeric(quantile(posterior_samples, 0.025))),
CI_97.5 = c(qnorm(0.975, mu_n, sigma_n),
as.numeric(quantile(posterior_samples, 0.975)))
)
print(comparison)
## Method Mean SD CI_2.5 CI_97.5
## 1 Theoretical 10.44200 0.03200568 10.37927 10.50473
## 2 MCMC (Stan) 10.44175 0.03200521 10.37922 10.50482
# Graphical comparison
mu_grid <- seq(10.3, 10.6, length.out = 1000)
theoretical <- dnorm(mu_grid, mu_n, sigma_n)
ggplot() +
geom_density(data = data.frame(mu = posterior_samples),
aes(x = mu, fill = "MCMC"), alpha = 0.5) +
geom_line(data = data.frame(mu = mu_grid, density = theoretical),
aes(x = mu, y = density, color = "Theoretical"), size = 1.2) +
scale_fill_manual(values = c("MCMC" = "steelblue")) +
scale_color_manual(values = c("Theoretical" = "black")) +
labs(title = "Posterior: MCMC vs Theoretical",
x = "μ (mean alcohol %)", y = "Density") +
theme_minimal()
Interpretation:
The close alignment between MCMC and theoretical posterior validates our MCMC estimation and confirms the accuracy of the sampling algorithm. This comparison is possible because the Normal-Normal conjugate model has a closed-form posterior solution.
#Full model
summary(wine) #Prior for intercept, quality centered at roughly 5.5 with 1.5 SE
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.600 Min. :0.1200 Min. :0.0000 Min. : 0.900
## 1st Qu.: 7.100 1st Qu.:0.3925 1st Qu.:0.0900 1st Qu.: 1.900
## Median : 7.900 Median :0.5200 Median :0.2500 Median : 2.200
## Mean : 8.311 Mean :0.5313 Mean :0.2684 Mean : 2.532
## 3rd Qu.: 9.100 3rd Qu.:0.6400 3rd Qu.:0.4200 3rd Qu.: 2.600
## Max. :15.900 Max. :1.5800 Max. :1.0000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 21.00 1st Qu.:0.9956
## Median :0.07900 Median :13.00 Median : 37.00 Median :0.9967
## Mean :0.08693 Mean :15.62 Mean : 45.91 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 61.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :68.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality
## Min. :2.740 Min. :0.3300 Min. : 8.40 Min. :3.000
## 1st Qu.:3.205 1st Qu.:0.5500 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.20 Median :6.000
## Mean :3.311 Mean :0.6577 Mean :10.44 Mean :5.657
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :8.000
## Id
## Min. : 0
## 1st Qu.: 411
## Median : 794
## Mean : 805
## 3rd Qu.:1210
## Max. :1597
prior_full_wine_model <- stan_glm(
quality~., data = wine, family = gaussian,
prior_intercept = normal(5.5, 1.5),
prior = normal(0, 2.5, autoscale = TRUE),
prior_aux = exponential(1, autoscale = TRUE),
chains = 4, iter = 2*5000, seed = 36501, prior_PD = TRUE)
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.014847 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 148.47 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.189 seconds (Warm-up)
## Chain 1: 0.193 seconds (Sampling)
## Chain 1: 0.382 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.217 seconds (Warm-up)
## Chain 2: 0.256 seconds (Sampling)
## Chain 2: 0.473 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1.1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.195 seconds (Warm-up)
## Chain 3: 0.203 seconds (Sampling)
## Chain 3: 0.398 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.185 seconds (Warm-up)
## Chain 4: 0.204 seconds (Sampling)
## Chain 4: 0.389 seconds (Total)
## Chain 4:
#Use weakly informative priors for the Coefficients and Sigma
#model relationship defined as
#
# data: Y_i | B0, B1,... B12, sigma ~ N(mu_i, sigma^2) with mu_i = B0 + B1*Xi_1 + ... + B12*Xi_12
# priors: B0_c ~ N(5.5, 1.5^2)
# B1,...B12 ~ N(0, 2.5^2) #unassuming location and variance, weak
# sigma ~ Exp(1)
## Density of the response
#Plot to show predicted draws from the prior model for quality
wine |>
add_predicted_draws(prior_full_wine_model, n = 100) |>
ggplot(aes(x = .prediction, group = .draw)) +
geom_density() +
xlab("quality")
## Warning:
## In add_predicted_draws(): The `n` argument is a deprecated alias for `ndraws`.
## Use the `ndraws` argument instead.
## See help("tidybayes-deprecated").
## Plot to show fitted draws from the prior model between quality and alcohol
wine %>% add_fitted_draws(prior_full_wine_model, n=100) %>%
ggplot(aes(x= alcohol, y=.value)) + geom_line(aes(group=.draw))
## Warning in fitted_draws.default(model = model, newdata = newdata, ..., n = n): `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
## - Use [add_]epred_draws() to get the expectation of the posterior predictive.
## - Use [add_]linpred_draws() to get the distribution of the linear predictor.
## - For example, you used [add_]fitted_draws(..., scale = "response"), which
## means you most likely want [add_]epred_draws(...).
## NOTE: When updating to the new functions, note that the `model` parameter is now
## named `object` and the `n` parameter is now named `ndraws`.
## Plot to show fitted draws from the prior model between quality and volatile.acidity
wine %>% add_fitted_draws(prior_full_wine_model, n=100) %>%
ggplot(aes(x= volatile.acidity , y=.value)) + geom_line(aes(group=.draw))
## Warning in fitted_draws.default(model = model, newdata = newdata, ..., n = n): `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
## - Use [add_]epred_draws() to get the expectation of the posterior predictive.
## - Use [add_]linpred_draws() to get the distribution of the linear predictor.
## - For example, you used [add_]fitted_draws(..., scale = "response"), which
## means you most likely want [add_]epred_draws(...).
## NOTE: When updating to the new functions, note that the `model` parameter is now
## named `object` and the `n` parameter is now named `ndraws`.
Both plots exhibit weak prior assumptions for coefficients and
sigma.
full_wine_model <- update(prior_full_wine_model, prior_PD = FALSE)
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 3.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.33 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.526 seconds (Warm-up)
## Chain 1: 0.958 seconds (Sampling)
## Chain 1: 1.484 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 2.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.564 seconds (Warm-up)
## Chain 2: 0.936 seconds (Sampling)
## Chain 2: 1.5 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1.5e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.567 seconds (Warm-up)
## Chain 3: 0.955 seconds (Sampling)
## Chain 3: 1.522 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.535 seconds (Warm-up)
## Chain 4: 0.868 seconds (Sampling)
## Chain 4: 1.403 seconds (Total)
## Chain 4:
# Confirm prior specification, possibly unnecessasry
prior_summary(full_wine_model)
## Priors for model 'full_wine_model'
## ------
## Intercept (after predictors centered)
## ~ normal(location = 5.5, scale = 1.5)
##
## Coefficients
## Specified prior:
## ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
## Adjusted prior:
## ~ normal(location = [0,0,0,...], scale = [ 1.15,11.21,10.24,...])
##
## Auxiliary (sigma)
## Specified prior:
## ~ exponential(rate = 1)
## Adjusted prior:
## ~ exponential(rate = 1.2)
## ------
## See help('prior_summary.stanreg') for more details
summary(full_wine_model, probs = c(0.025, 0.975))
##
## Model Info:
## function: stan_glm
## family: gaussian [identity]
## formula: quality ~ .
## algorithm: sampling
## sample: 20000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 1143
## predictors: 13
##
## Estimates:
## mean sd 2.5% 97.5%
## (Intercept) 23.0 25.0 -25.4 72.4
## fixed.acidity 0.0 0.0 0.0 0.1
## volatile.acidity -1.1 0.1 -1.4 -0.8
## citric.acid -0.1 0.2 -0.5 0.2
## residual.sugar 0.0 0.0 0.0 0.1
## chlorides -1.7 0.5 -2.7 -0.7
## free.sulfur.dioxide 0.0 0.0 0.0 0.0
## total.sulfur.dioxide 0.0 0.0 0.0 0.0
## density -18.8 25.5 -69.0 30.7
## pH -0.4 0.2 -0.9 0.0
## sulphates 0.9 0.1 0.6 1.1
## alcohol 0.3 0.0 0.2 0.3
## Id 0.0 0.0 0.0 0.0
## sigma 0.6 0.0 0.6 0.7
##
## Fit Diagnostics:
## mean sd 2.5% 97.5%
## mean_PPD 5.7 0.0 5.6 5.7
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## (Intercept) 0.3 1.0 8292
## fixed.acidity 0.0 1.0 8423
## volatile.acidity 0.0 1.0 15462
## citric.acid 0.0 1.0 14361
## residual.sugar 0.0 1.0 12244
## chlorides 0.0 1.0 14915
## free.sulfur.dioxide 0.0 1.0 14351
## total.sulfur.dioxide 0.0 1.0 13117
## density 0.3 1.0 8174
## pH 0.0 1.0 9884
## sulphates 0.0 1.0 16601
## alcohol 0.0 1.0 9358
## Id 0.0 1.0 19295
## sigma 0.0 1.0 22015
## mean_PPD 0.0 1.0 21757
## log-posterior 0.0 1.0 7957
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
mcmc_trace(full_wine_model)
mcmc_dens_overlay(full_wine_model)
mcmc_acf(full_wine_model)
Interpretation:
Diagnostics exhibit quick, well-mixing, trace plots with normal densities and low autocorrelation for the features. Rhats show variance stability between and within chains, and effective ratios between 0.4 and 1.1 for our coefficients, sigma, and intercept, also point to low autocorrelation. These diagnostics point to trustworthy results from the MCMC.
Estimates from the posterior summary also show that, for 95% of the probability densities, only volatile.acidity, chlorides, sulphates, and alcohol intervals outside of 0. We’ll examine these in a reduced regression model using MCMC.
# reduced model
reduced_wine_model <- stan_glm(
quality~volatile.acidity+chlorides+sulphates+alcohol,
data = wine, family = gaussian,
prior_intercept = normal(5.5, 1.5),
prior = normal(0, 2.5, autoscale = TRUE),
prior_aux = exponential(1, autoscale = TRUE),
chains = 4, iter = 2*5000, seed = 36501) #Same weakly informative priors for the Coefficients and Sigma
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 2.3e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.159 seconds (Warm-up)
## Chain 1: 0.539 seconds (Sampling)
## Chain 1: 0.698 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 1.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.158 seconds (Warm-up)
## Chain 2: 0.537 seconds (Sampling)
## Chain 2: 0.695 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1.1e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.158 seconds (Warm-up)
## Chain 3: 0.521 seconds (Sampling)
## Chain 3: 0.679 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1.2e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.159 seconds (Warm-up)
## Chain 4: 0.532 seconds (Sampling)
## Chain 4: 0.691 seconds (Total)
## Chain 4:
summary(reduced_wine_model, probs = c(0.025, 0.975))
##
## Model Info:
## function: stan_glm
## family: gaussian [identity]
## formula: quality ~ volatile.acidity + chlorides + sulphates + alcohol
## algorithm: sampling
## sample: 20000 (posterior sample size)
## priors: see help('prior_summary')
## observations: 1143
## predictors: 5
##
## Estimates:
## mean sd 2.5% 97.5%
## (Intercept) 2.8 0.2 2.4 3.3
## volatile.acidity -1.2 0.1 -1.4 -1.0
## chlorides -1.4 0.5 -2.4 -0.5
## sulphates 0.8 0.1 0.6 1.1
## alcohol 0.3 0.0 0.3 0.3
## sigma 0.6 0.0 0.6 0.7
##
## Fit Diagnostics:
## mean sd 2.5% 97.5%
## mean_PPD 5.7 0.0 5.6 5.7
##
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
##
## MCMC diagnostics
## mcse Rhat n_eff
## (Intercept) 0.0 1.0 22396
## volatile.acidity 0.0 1.0 20878
## chlorides 0.0 1.0 17281
## sulphates 0.0 1.0 16955
## alcohol 0.0 1.0 21505
## sigma 0.0 1.0 24511
## mean_PPD 0.0 1.0 21338
## log-posterior 0.0 1.0 8933
##
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# effective sample size ratio
neff_ratio(reduced_wine_model)
## (Intercept) volatile.acidity chlorides sulphates
## 1.11980 1.04390 0.86405 0.84775
## alcohol sigma
## 1.07525 1.22555
mcmc_trace(reduced_wine_model)
mcmc_dens_overlay(reduced_wine_model)
mcmc_acf(reduced_wine_model)
pp_check(full_wine_model)
pp_check(reduced_wine_model) #Reduced model closer to data observations; multimodel traits of data is due to discrete values for quality.
full_predictions <- posterior_predict(full_wine_model, newdata = wine)
reduced_predictions <- posterior_predict(reduced_wine_model, newdata = wine)
ppc_intervals(y = wine$quality, yrep = full_predictions, x = wine$volatile.acidity, prob = 0.5, prob_outer = 0.95) + labs(x = 'Volatile.acidity' , y = 'Quality', title = 'Full Model')
ppc_intervals(y = wine$quality, yrep = reduced_predictions, x = wine$volatile.acidity, prob = 0.5, prob_outer = 0.95) + labs(x = 'Volatile.acidity' , y = 'Quality', title = 'Reduced Model')
Reduced model has just as good coverage as full for posterior predictive intervals.
full_wine_cv <- prediction_summary_cv(data = wine, model = full_wine_model, k = 10)
full_wine_cv$cv
## mae mae_scaled within_50 within_95
## 1 0.4033039 0.6273625 0.5399542 0.9484058
reduced_wine_cv <- prediction_summary_cv(data = wine, model = reduced_wine_model, k = 10)
reduced_wine_cv$cv
## mae mae_scaled within_50 within_95
## 1 0.4157651 0.6393778 0.531968 0.9440122
The MAE between predicted quality and actual is typically small at about 0.4 for both models, 0.62 for scaled errors in both, and 0.53 for 50% PPIs, and 0.94 for 95% PPIs for both models as well. Prediction coverage is good with small error.
#Expected Log-Predictive Densities for model comparison
loo_1 <- loo(full_wine_model)
loo_2 <- loo(reduced_wine_model)
c(loo_1$estimates[1], loo_2$estimates[1])
## [1] -1122.175 -1128.180
loo_compare(loo_1, loo_2)
## elpd_diff se_diff
## full_wine_model 0.0 0.0
## reduced_wine_model -6.0 5.9
Full model may have 6 units of better predictive power, with the true difference being roughly within 11.8 units or 2 SE of the estimated difference. With this small, and possibly 0, difference in predictive power, the smaller model is better.
wine_new <- as.data.frame(reduced_wine_model) |>
mutate(mu = `(Intercept)` + volatile.acidity*0.7 + chlorides*0.076 + sulphates*0.56 + alcohol*9.4,
y_new = rnorm(20000, mu, sigma))
wine_actual_obs <- wine |>
filter(volatile.acidity == 0.7, chlorides == 0.076, sulphates == 0.56, alcohol == 9.4) #Actual quality is 5
wine_new |>
ggplot(aes(x = y_new)) +
geom_density() +
geom_vline(xintercept = mean(wine_new$y_new))
wine_new |>
summarise(mean = mean(y_new), sd = sd(y_new), error = 5 - mean(y_new), scaled_error = (5 - mean(y_new)) / sd(y_new))
## mean sd error scaled_error
## 1 5.075293 0.6464516 -0.07529333 -0.1164717
#c
ppc_intervals(wine$quality, yrep = matrix(wine_new$y_new, ncol = 1143), x = wine$volatile.acidity)
## Warning in matrix(wine_new$y_new, ncol = 1143): data length [20000] is not a
## sub-multiple or multiple of the number of rows [18]
#Model Predictions for some fixed predictor values accurately model the response. Average predicted error from the actual is small, as well as the scaled error.